rm(list=ls(all=TRUE))
#Set WD
setwd("")

#Packages
library(tidyverse)
library(stargazer)
library(ggpubr)
library(mediation)
library(readxl)

complete <- read_csv("complete.csv")                 # Individual-level data without missing variables, only individuals with vote choice
complete_2 <- read_csv("complete_2.csv")             # Individual-level data without missing variables, without vote choice, for turnout analysis
dyads <- read_csv("dyads.csv")   # Individual-party dyads
parlgov_cabinet <- read_excel("parlgov.xlsx", sheet="cabinet")

glimpse(complete)

####################################################################################################
# Figure 1, Graph showing party choice as a function of ideological closeness
####################################################################################################
inds_inanalysis <- unique(complete$ind)
dyads_filtered <- dyads %>%
  filter(ind %in% inds_inanalysis)

forgraph <- dyads_filtered %>%
  filter(!is.na(rank_bestparty_lr)) %>%
  group_by(rank_bestparty_lr) %>%
  summarize(dist_lr = mean(dist_lr, na.rm=TRUE),
            votedfor = mean(votedfor, na.rm=TRUE),
            n = n())

fig1_closeness <- forgraph %>%
  ggplot(aes(x=rank_bestparty_lr, y=votedfor*100)) +
  geom_line() +
  geom_point() +
  theme_minimal() +
  theme(aspect.ratio = 1,
        panel.border = element_rect(colour="black", fill=NA),
        panel.grid.minor.x = element_blank()) +
  labs(y="Percent voting for party",
       x="Party closeness ranking") +
  annotate("curve", xend=1.2, yend=30, x=4.3, y=28.5, arrow=arrow(length = unit(3, "mm"))) +
  annotate("text", x=4.5, y=27, hjust=00, vjust=0.5, label="Parties closest on the\nleft/right scale were\nchosen 29.7% of the time") +
  scale_x_continuous(breaks=c(1, 5, 10, 15))


png("Output/fig1_closeness.png", width=4, height=4, units="in", res=300)
fig1_closeness
dev.off()

####################################################################################################
# Aggregate for graphs
####################################################################################################

# Includes non-voters
bydeciles_all_turnout <- complete_2 %>%
  mutate(incperc = floor(incperc/10)*10+5,
         ind_lrscale = ind_lrscale*10) %>%
  group_by(incperc) %>%
  summarize(across(.cols=c(y1_cong, y1_cong_nomid, ind_lrscale, actual_dist, best_dist, voted, correct, actual_cabinet_party, best_cabinet_party, neither), ~ mean(.x, na.rm=TRUE)))

bydeciles_all <- complete %>%
  mutate(incperc = floor(incperc/10)*10+5,
         ind_lrscale = ind_lrscale*10) %>%
  group_by(incperc) %>%
  summarize(across(.cols=c(y1_cong, y1_cong_nomid, ind_lrscale, actual_dist, best_dist, voted, correct, actual_cabinet_party, best_cabinet_party, neither), ~ mean(.x, na.rm=TRUE)))

# Includes only valid
bydeciles_filtered <- complete %>%
  mutate(incperc = floor(incperc/10)*10+5,
         ind_lrscale = ind_lrscale*10) %>%
  group_by(incperc) %>%
  summarize(across(.cols=c(y1_cong, y1_cong_nomid, ind_lrscale, actual_dist, best_dist, voted, correct, actual_cabinet_party, best_cabinet_party, neither), ~ mean(.x, na.rm=TRUE)))

bydeciles_filtered_cabinet_actual <- complete %>%
  mutate(incperc = floor(incperc/10)*10+5,
         ind_lrscale = ind_lrscale*10) %>%
  group_by(incperc, actual_cabinet_party) %>%
  summarize(across(.cols=c(y1_cong, y1_cong_nomid, actual_dist, best_dist, voted, correct, best_cabinet_party), ~ mean(.x, na.rm=TRUE)), .groups="drop")

bydeciles_filtered_cabinet_best <- complete %>%
  mutate(incperc = floor(incperc/10)*10+5,
         ind_lrscale = ind_lrscale*10) %>%
  group_by(incperc, best_cabinet_party) %>%
  summarize(across(.cols=c(y1_cong, y1_cong_nomid, actual_dist, best_dist, voted, correct, actual_cabinet_party), ~ mean(.x, na.rm=TRUE)), .groups="drop")

####################################################################################################
# Figure 2, graph showing congruence as a function of income
####################################################################################################

fig2_congruence <- bydeciles_all %>%
  ggplot(aes(x=incperc, y=y1_cong*100)) +
  geom_hline(yintercept=50, linetype="dashed") +
  geom_line(data=bydeciles_filtered) +
  geom_point(data=bydeciles_filtered) +
  scale_x_continuous(breaks = seq(0, 100, by=10), limit=c(0, 100)) +
  lims(y=c(50, 60)) +
  labs(x="Income percentile (grouped in deciles)", y="Average congruence") +
  theme_minimal() +
  theme(legend.position = "none", panel.grid.minor = element_blank(),
        panel.border = element_rect(colour="black", fill=NA))

png("Output/fig2_congruence.png", width=4, height=4, units="in", res=300)
fig2_congruence
dev.off()



####################################################################################################
# Figure 3, combined graph
####################################################################################################

fig3_a <- bydeciles_all_turnout %>%
  ggplot(aes(x=incperc, y=voted)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(0, 100, by=10), limit=c(0, 100)) +
  labs(x=element_blank(), title="(a) Turnout", y="Proportion voted (0-1)") +
  theme_minimal() +
  theme(legend.position = "none", panel.grid.minor = element_blank(),
        panel.border = element_rect(colour="black", fill=NA),
        plot.title = element_text(hjust=0.5),
        aspect.ratio = 1) +
  lims(y=c(0.7, 0.9))

fig3_b <- bydeciles_filtered %>%
  ggplot(aes(x=incperc, y=best_dist)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(0, 100, by=10), limit=c(0, 100)) +
  labs(x=element_blank(), title="(b) Distance to ideologically\nclosest party", y="Ideological distance (0-10)") +
  theme_minimal() +
  theme(legend.position = "none", panel.grid.minor = element_blank(),
        panel.border = element_rect(colour="black", fill=NA),
        plot.title = element_text(hjust=0.5),
        aspect.ratio = 1) +
  lims(y=c(0.45, 0.6))

fig3_d <- bydeciles_filtered %>%
  ggplot(aes(x=incperc, y=actual_dist)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(0, 100, by=10), limit=c(0, 100)) +
  labs(x=element_blank(), title="(d) Distance to\npreferred party", y="Ideological distance (0-10)") +
  theme_minimal() +
  theme(legend.position = "none", panel.grid.minor = element_blank(),
        panel.border = element_rect(colour="black", fill=NA),
        plot.title = element_text(hjust=0.5),
        aspect.ratio = 1) +
  lims(y=c(1.3, 1.7))

fig3_c <- bydeciles_filtered %>%
  ggplot(aes(x=incperc, y=correct)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(0, 100, by=10), limit=c(0, 100)) +
  labs(x=element_blank(), title="(c) Prefer ideologically\nclosest party", y="Proportion preferring (0-1)") +
  theme_minimal() +
  theme(legend.position = "none", panel.grid.minor = element_blank(),
        panel.border = element_rect(colour="black", fill=NA),
        plot.title = element_text(hjust=0.5),
        aspect.ratio = 1) +
  lims(y=c(0.25, 0.35))

fig3_e <- bydeciles_filtered %>%
  ggplot(aes(x=incperc, y=actual_cabinet_party)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(0, 100, by=10), limit=c(0, 100)) +
  labs(x="Income percentile (in deciles)", title="(e) Preferred party\nin government", y="Proportion in government (0-1)") +
  theme_minimal() +
  theme(legend.position = "none", panel.grid.minor = element_blank(),
        panel.border = element_rect(colour="black", fill=NA),
        plot.title = element_text(hjust=0.5),
        aspect.ratio = 1) +
  lims(y=c(0.45, 0.55))

fig3_f <- bydeciles_filtered %>%
  ggplot(aes(x=incperc, y=best_cabinet_party)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(0, 100, by=10), limit=c(0, 100)) +
  labs(x="Income percentile (in deciles)", title="(f) Ideologically closest\nparty in government", y="Proportion in government (0-1)") +
  theme_minimal() +
  theme(legend.position = "none", panel.grid.minor = element_blank(),
        panel.border = element_rect(colour="black", fill=NA),
        plot.title = element_text(hjust=0.5),
        aspect.ratio = 1) +
  lims(y=c(0.325, 0.425))

png("Output/fig3_combined.png", width=6, height=9, units="in", res=300)
ggarrange(fig3_a, fig3_b, fig3_c, fig3_d, fig3_e, fig3_f, nrow=3, ncol=2, align=c("hv"))
dev.off()


####################################################################################################
# Table 1, regression analysis
####################################################################################################

m1 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + incperc, data=complete)
m2 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + actual_cabinet_party + incperc, data=complete)
m3 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + best_cabinet_party + incperc, data=complete)
m4 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + actual_dist + incperc, data=complete)
m5 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + best_dist + incperc, data=complete)
m6 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + best_votedfor + incperc, data=complete)
m7 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + actual_cabinet_party + best_cabinet_party + actual_dist + best_dist + incperc, data=complete)
m8 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + incperc, data=filter(complete_2, !is.na(voted)))
m9 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + voted + incperc, data=filter(complete_2, !is.na(voted)))

stargazer(m1, m2, m3, m4, m5, m6, m7, m8, m9,
          type="text", omit=c("country", "y0_year"), omit.stat=c("SER", "F"),
          star.cutoffs = c(0.05, 0.01))

####################################################################################################
# Appendix regressions
####################################################################################################

#####################
# Table A1
#####################
m1 <- lm(eval(y1_cong*100) ~ incperc, data=filter(complete))
m2 <- lm(eval(y1_cong*100) ~ as.factor(country) + as.factor(y0_year) + incperc, data=filter(complete))
stargazer(m1, m2,
          type="text", omit=c("country", "y0_year"), omit.stat=c("SER", "F"),
          star.cutoffs = c(0.05, 0.01))

#####################
# Table A2
#####################

m1 <- lm(eval(voted*100) ~ incperc, data=filter(complete_2))
m2 <- lm(eval(voted*100) ~ as.factor(country) + as.factor(y0_year) + incperc, data=filter(complete_2))

stargazer(m1, m2,
          type="text", omit=c("country", "y0_year"), omit.stat=c("SER", "F"),
          star.cutoffs = c(0.05, 0.01))

#####################
# Table A3
#####################

m1 <- lm(eval(correct*100) ~ incperc, data=filter(regdata))
m2 <- lm(eval(correct*100) ~ as.factor(country) + as.factor(y0_year) + incperc, data=filter(regdata))

stargazer(m1, m2,
          type="text", omit=c("country", "y0_year"), omit.stat=c("SER", "F"),
          star.cutoffs = c(0.05, 0.01))

#####################
# Table A4
#####################

m1 <- lm(eval(actual_cabinet_party*100) ~ incperc, data=filter(regdata))
m2 <- lm(eval(actual_cabinet_party*100) ~ as.factor(country) + as.factor(y0_year) + incperc, data=filter(regdata))
m3 <- lm(eval(best_cabinet_party*100) ~ incperc, data=filter(regdata))
m4 <- lm(eval(best_cabinet_party*100) ~ as.factor(country) + as.factor(y0_year) + incperc, data=filter(regdata))

stargazer(m1, m2, m3, m4,
          type="text", omit=c("country", "y0_year"), omit.stat=c("SER", "F"),
          star.cutoffs = c(0.05, 0.01))



# Mediation analysis


#####################
# Table A5 mediation analysis
#####################

# This part conducts mediation analysis using the "mediation" package (Tingley et al 2014).
# Due to the random nature of the simulations, figures may deviate slightly from those in the published paper.

complete <- complete %>% 
  mutate(y1_cong_100 = y1_cong*100)

med.fit_1 <- lm(actual_cabinet_party ~ incperc + as.factor(country) + as.factor(y0_year), data=complete)
out.fit_1 <- lm(y1_cong_100 ~ actual_cabinet_party + incperc + as.factor(country) + as.factor(y0_year), data=complete)
med.out_1 <- mediate(med.fit_1, out.fit_1, treat="incperc", mediator="actual_cabinet_party", robustSE = TRUE, sims = 100)
summary(med.out_1)

med.fit_2 <- lm(best_cabinet_party ~ incperc + as.factor(country) + as.factor(y0_year), data=complete)
out.fit_2 <- lm(y1_cong_100 ~ best_cabinet_party + incperc + as.factor(country) + as.factor(y0_year), data=complete)
med.out_2 <- mediate(med.fit_2, out.fit_2, treat="incperc", mediator="best_cabinet_party", robustSE = TRUE, sims = 100)
summary(med.out_2)

med.fit_3 <- lm(actual_dist ~ incperc + as.factor(country) + as.factor(y0_year), data=complete)
out.fit_3 <- lm(y1_cong_100 ~ actual_dist + incperc + as.factor(country) + as.factor(y0_year), data=complete)
med.out_3 <- mediate(med.fit_3, out.fit_3, treat="incperc", mediator="actual_dist", robustSE = TRUE, sims = 100)
summary(med.out_3)

med.fit_4 <- lm(best_dist ~ incperc + as.factor(country) + as.factor(y0_year), data=complete)
out.fit_4 <- lm(y1_cong_100 ~ best_dist + incperc + as.factor(country) + as.factor(y0_year), data=complete)
med.out_4 <- mediate(med.fit_4, out.fit_4, treat="incperc", mediator="best_dist", robustSE = TRUE, sims = 100)
summary(med.out_4)

med.fit_5 <- lm(best_votedfor ~ incperc + as.factor(country) + as.factor(y0_year), data=complete)
out.fit_5 <- lm(y1_cong_100 ~ best_votedfor + incperc + as.factor(country) + as.factor(y0_year), data=complete)
med.out_5 <- mediate(med.fit_5, out.fit_5, treat="incperc", mediator="best_votedfor", robustSE = TRUE, sims = 100)
summary(med.out_5)

med.fit_6 <- lm(voted ~ incperc + as.factor(country) + as.factor(y0_year), data=complete_2)
out.fit_6 <- lm(y1_cong_100 ~ voted + incperc + as.factor(country) + as.factor(y0_year), data=complete_2)
med.out_6 <- mediate(med.fit_6, out.fit_6, treat="incperc", mediator="voted", robustSE = TRUE, sims = 100)
summary(med.out_6)

#####################
# Descriptives
#####################

# Included countries and election years
parlgov_cabinetlist <- parlgov_cabinet %>%
  group_by(cabinet_id) %>%
  summarize(election_date = first(election_date),
            start_date = first(start_date))

complete_partylist <- complete %>%
  left_join(., parlgov_cabinetlist, by="cabinet_id") %>%
  group_by(country_name, election_date) %>%
  summarize(n = n()) %>%
  ungroup() %>%
  arrange(country_name, election_date) %>%
  mutate(year = year(as.Date(election_date)))

# Create table of unique country-years
complete_partylist %>%
  group_by(country_name, year) %>%
  summarize(n(), .groups="drop") %>%
  group_by(country_name) %>% 
  summarize(paste0(year, collapse=", ")) %>% 
  print(n=100)